library("gridExtra")
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::combine() masks gridExtra::combine()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
library("ggplot2")
library("highcharter")
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use

First grab the data.

df_2019 <- read.csv("/Users/lubis/Documents/study/dataanalysis/hackathon/world-happiness/2019.csv", header=TRUE)
df_2015 <- 
  read.csv("/Users/lubis/Documents/study/dataanalysis/hackathon/world-happiness/2015.csv", header=TRUE) %>%
  select(Country, Region)

Check types of each column

str(df_2019)
## 'data.frame':    156 obs. of  9 variables:
##  $ Overall.rank                : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Country.or.region           : Factor w/ 156 levels "Afghanistan",..: 44 37 106 58 99 134 133 100 24 7 ...
##  $ Score                       : num  7.77 7.6 7.55 7.49 7.49 ...
##  $ GDP.per.capita              : num  1.34 1.38 1.49 1.38 1.4 ...
##  $ Social.support              : num  1.59 1.57 1.58 1.62 1.52 ...
##  $ Healthy.life.expectancy     : num  0.986 0.996 1.028 1.026 0.999 ...
##  $ Freedom.to.make.life.choices: num  0.596 0.592 0.603 0.591 0.557 0.572 0.574 0.585 0.584 0.532 ...
##  $ Generosity                  : num  0.153 0.252 0.271 0.354 0.322 0.263 0.267 0.33 0.285 0.244 ...
##  $ Perceptions.of.corruption   : num  0.393 0.41 0.341 0.118 0.298 0.343 0.373 0.38 0.308 0.226 ...
str(df_2015)
## 'data.frame':    158 obs. of  2 variables:
##  $ Country: Factor w/ 158 levels "Afghanistan",..: 136 59 38 106 25 46 100 135 101 7 ...
##  $ Region : Factor w/ 10 levels "Australia and New Zealand",..: 10 10 10 10 6 10 10 10 1 1 ...
df_2019 <- rename(df_2019, Country = Country.or.region)
str(df_2019)
## 'data.frame':    156 obs. of  9 variables:
##  $ Overall.rank                : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Country                     : Factor w/ 156 levels "Afghanistan",..: 44 37 106 58 99 134 133 100 24 7 ...
##  $ Score                       : num  7.77 7.6 7.55 7.49 7.49 ...
##  $ GDP.per.capita              : num  1.34 1.38 1.49 1.38 1.4 ...
##  $ Social.support              : num  1.59 1.57 1.58 1.62 1.52 ...
##  $ Healthy.life.expectancy     : num  0.986 0.996 1.028 1.026 0.999 ...
##  $ Freedom.to.make.life.choices: num  0.596 0.592 0.603 0.591 0.557 0.572 0.574 0.585 0.584 0.532 ...
##  $ Generosity                  : num  0.153 0.252 0.271 0.354 0.322 0.263 0.267 0.33 0.285 0.244 ...
##  $ Perceptions.of.corruption   : num  0.393 0.41 0.341 0.118 0.298 0.343 0.373 0.38 0.308 0.226 ...
str(df_2015)
## 'data.frame':    158 obs. of  2 variables:
##  $ Country: Factor w/ 158 levels "Afghanistan",..: 136 59 38 106 25 46 100 135 101 7 ...
##  $ Region : Factor w/ 10 levels "Australia and New Zealand",..: 10 10 10 10 6 10 10 10 1 1 ...
df_2019 <- left_join(df_2019, df_2015)
## Joining, by = "Country"
## Warning: Column `Country` joining factors with different levels, coercing to
## character vector
columns <- c("GDP.per.capita", "Social.support", "Healthy.life.expectancy",
             "Freedom.to.make.life.choices", "Generosity", "Perceptions.of.corruption")
p1 <- ggplot(data = df_2019, mapping = aes(x = !!sym(columns[1]) , y = Score)) +
       geom_point(aes(alpha = 0.5, color = Region), show.legend=FALSE) +
       geom_smooth(method = "lm") + theme(legend.title = element_blank())
p2 <- ggplot(data = df_2019, mapping = aes(x = !!sym(columns[2]) , y = Score)) +
       geom_point(aes(alpha = 0.5, color = Region), show.legend=FALSE) +
       geom_smooth(method = "lm") + theme(legend.title = element_blank())
p3 <- ggplot(data = df_2019, mapping = aes(x = !!sym(columns[3]) , y = Score)) +
       geom_point(aes(alpha = 0.5, color = Region), show.legend=FALSE) +
       geom_smooth(method = "lm") + theme(legend.title = element_blank())
p4 <- ggplot(data = df_2019, mapping = aes(x = !!sym(columns[4]) , y = Score)) +
       geom_point(aes(alpha = 0.5, color = Region), show.legend=FALSE) +
       geom_smooth(method = "lm") + theme(legend.title = element_blank())
p5 <- ggplot(data = df_2019, mapping = aes(x = !!sym(columns[5]) , y = Score)) +
       geom_point(aes(alpha = 0.5, color = Region), show.legend=FALSE) +
       geom_smooth(method = "lm") + theme(legend.title = element_blank())
p6 <- ggplot(data = df_2019, mapping = aes(x = !!sym(columns[6]) , y = Score)) +
       geom_point(alpha = 0.5, aes(color = Region), show.legend=FALSE) +
       geom_smooth(method = "lm") 

grid.arrange(p1, p2, p3, p4, p5, p6, nrow=3, heights=c(4,4,4))

mean <- ggplot(
  df_2019 %>%
    group_by(Region) %>%
    summarise(mean_score = mean(Score),
              mean_income = mean(GDP.per.capita)),
  mapping = aes(x = mean_income, y = mean_score)) +
    geom_point(aes(color=Region))
## Warning: Factor `Region` contains implicit NA, consider using
## `forcats::fct_explicit_na`
box <- ggplot(df_2019,
       aes(x = Region, y = Score, colour= Region)) +
      geom_point(alpha = 0.7) +
      geom_boxplot(alpha = 0, colour = "black")

grid.arrange(mean, box, ncol=2)

cor <- cor(df_2019[,c(3,4,5,6,7,8,9)])
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
map_cor <- melt(cor)
ggplot(data = map_cor, aes(x=Var1, y=Var2, fill=value)) + geom_tile() + 
  scale_fill_gradient(low="white", high="red")

data(worldgeojson, package = "highcharter")

countries <- df_2019 %>% 
  filter(!is.na(Country)) %>% 
  select(Country, Score)

highchart() %>%
hc_add_series_map(worldgeojson, countries, value = "Score", joinBy = c('name','Country'))  %>% 
    hc_colors(c("darkorange", "darkgray")) %>% 
    hc_colorAxis(stops = color_stops(), min=2.5, max=8) %>% 
    hc_title(text = "Happiness Score")
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p <- df_2019 %>%
  ggplot(aes(GDP.per.capita, Score, color=Region)) +
  geom_point() +
  theme_bw()

ggplotly(p)